gvc_agora_opentargets

TODO

Setup environment

library(tidyverse)
library(janitor)
library(broom)
library(readxl)
library(jsonlite)

library(gprofiler2)

theme_set(theme_bw())

set.seed(666)

Read and prep data

GVC

Genes within 1Mb window of (each side of?) GVC loci from Fanny:

gvc <- read_xlsx("GVC_1Mb_comparison_050224.xlsx") %>%
  clean_names() %>% 
  separate(gene_id, c("gene_id", "version")) %>%
  select(-version, -agora_nominated_list, -opentarget_info)

gvc
gvc.genes <- gvc %>% distinct(gene_id, .keep_all = TRUE) %>% select(gene_id, gene_symbol) %>% arrange(gene_symbol)

gvc.genes
gvc.genes %>% distinct(gene_symbol) %>% nrow()
[1] 1344

Agora

Alzheimer’s disease gene prioritization scores from Agora (see also related journal article):

ago1 <- read_json("agora.syn25741025.overall_scores.v12.2024-10-24.json", simplifyVector = TRUE) %>% as_tibble()

ago1

Alzheimer’s disease genes (Agora nominated targets):

https://agora.adknowledgeportal.org/genes/nominated-targets

ago2 <- read_csv("agora.nominated-targets.gene-list.2024-10-24.csv")
ago2
ago <- ago1 %>% filter(hgnc_symbol %in% ago2$`Gene Symbol`)

OpenTargets

Alzheimer’s disease gene prioritization scores from OpenTargets:

# ot <- read_tsv("OT-MONDO_0004975-associated-targets-6_4_2024-v24_03.tsv", show_col_types = FALSE, na = "No data")

ot <- read_tsv("OT-MONDO_0004975-associated-targets-10_24_2024-v24_09.tsv", show_col_types = FALSE, na = "No data")

ot

Add Ensembl Gene IDs (WTF!):

otcols <- colnames(ot)
otensg <- gconvert(
  query = ot$symbol,
  organism = "hsapiens",
  target= "ENSG",
  mthreshold = Inf,
  filter_na = TRUE) %>% 
  mutate(input_number = as.character(input_number)) %>%
  left_join(ot %>% rownames_to_column(var = "input_number"), by = "input_number") %>% 
  select(ensembl_gene_id = target, otcols)

otensg

Correlation of Agora and OpenTargets scores

GVC genes

d.cor <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id))
d.cor %>% nrow()
[1] 1347
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()
[1] 56
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
  unnest(cor)
d.cor %>% nrow()
[1] 1347
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()
[1] 75
d.cor %>% drop_na(target_risk_score, globalScore) %>%
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
  unnest(cor)

All genes

d.cor <- ago %>% 
  left_join(otensg, by = "ensembl_gene_id")
d.cor %>% nrow()
[1] 925
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>% nrow()
[1] 75
d.cor %>% drop_na(genetics_score, otGeneticsPortal) %>%
  summarize(cor = tidy(cor.test(genetics_score, otGeneticsPortal, method="kendall"))) %>%
  unnest(cor)
d.cor %>% nrow()
[1] 925
d.cor %>% drop_na(target_risk_score, globalScore) %>% nrow()
[1] 488
d.cor %>% drop_na(target_risk_score, globalScore) %>%
  summarize(cor = tidy(cor.test(target_risk_score, globalScore, method="kendall"))) %>%
  unnest(cor)

Overlaps between GVC, Agora, and OpenTargets genes

x = list(
  "GVC" = gvc.genes$gene_id,
  "Agora" = ago$ensembl_gene_id,
  "OpenTargets" = otensg$ensembl_gene_id
)
library(VennDiagram)
grid.newpage()
v <- venn.diagram(
  x,
  fill = c("#FF0000", "#00FF00", "#0000FF"),
  filename = NULL)
grid.draw(v)

p <- get.venn.partitions(x)
p

ORA of genes in overlaps

GVC ∩ Agora ∩ OpenTargets

genes <- p %>% 
  filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)
# save overlap gene ids for later
overlap_gene_ids <- query

GVC ∩ Agora

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC ∩ OpenTargets

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩OpenTargets)∖(Agora)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora ∩ OpenTargets

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(Agora∩OpenTargets)∖(GVC)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∪ (GVC ∩ OpenTargets) ∪ (Agora ∩ OpenTargets)

genes <- p %>% 
  filter(..set.. %in% c("GVC∩Agora∩OpenTargets", "(GVC∩Agora)∖(OpenTargets)", "(GVC∩OpenTargets)∖(Agora)", "(Agora∩OpenTargets)∖(GVC)")) %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora ∩ OpenTargets) ∖ (GVC)

genes <- p %>%
  filter(..set.. == "(Agora∩OpenTargets)∖(GVC)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ OpenTargets) ∖ (Agora)

genes <- p %>%
  filter(..set.. == "(GVC∩OpenTargets)∖(Agora)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(OpenTargets) ∖ (GVC ∪ Agora)

genes <- p %>%
  filter(..set.. == "(OpenTargets)∖(GVC∪Agora)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC ∩ Agora) ∖ (OpenTargets)

genes <- p %>%
  filter(..set.. == "(GVC∩Agora)∖(OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(Agora) ∖ (GVC ∪ OpenTargets)

genes <- p %>%
  filter(..set.. == "(Agora)∖(GVC∪OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

(GVC) ∖ (Agora ∪ OpenTargets)

genes <- p %>%
  filter(..set.. == "(GVC)∖(Agora∪OpenTargets)") %>% 
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  select(gene_id, symbol, genetics_score, otGeneticsPortal, globalScore, target_risk_score, multi_omics_score) %>%
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore))

genes
query <- genes %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

ORA of GVC genes sorted by Agora or OpenTargets scores

GVC genes sorted by Agora’s genetics_score

Arrange by Agora’s genetics_score and OpenTargets’ otGeneticsPortal:

d1 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(genetics_score), desc(otGeneticsPortal), desc(target_risk_score), desc(globalScore)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, genetics_score, otGeneticsPortal, everything())

d1
query <- d1 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by OpenTargets’ otGeneticsPortal

Arrange by OpenTargets’ otGeneticsPortal and Agora’s genetics_score:

d2 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(otGeneticsPortal), desc(genetics_score), desc(globalScore), desc(target_risk_score)) %>%
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, otGeneticsPortal, genetics_score, everything())

d2
query <- d2 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by Agora’s target_risk_score

Arrange by Agora’s target_risk_score and OpenTargets’ globalScore:

d3 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(target_risk_score), desc(globalScore)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, target_risk_score, globalScore, everything())

d3
query <- d3 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

GVC genes sorted by OpenTargets’ globalScore

Arrange by OpenTargets’ globalScore and Agora’s target_risk_score:

d4 <- gvc.genes %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>% 
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(globalScore), desc(target_risk_score)) %>% 
  select(-c(symbol, hgnc_symbol)) %>% 
  select(gene_id, gene_symbol, globalScore, target_risk_score, everything())

d4
query <- d4 %>% distinct(gene_id) %>% pull(gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

ORA of Agora and OpenTargets genes sorted by global or genetic score

Agora genes sorted by genetics_score

d5 <- ago %>%
  drop_na(genetics_score) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(genetics_score))

d5
query <- d5 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora genes sorted by target_risk_score

d6 <- ago %>%
  drop_na(target_risk_score) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(target_risk_score))

d6
query <- d6 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Agora genes sorted by multi_omics_score

d7 <- ago %>%
  drop_na(multi_omics_score) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(multi_omics_score))

d7
query <- d7 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

OpenTargets genes sorted by otGeneticsPortal

d8 <- otensg %>%
  drop_na(otGeneticsPortal) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(otGeneticsPortal))

d8
query <- d8 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

OpenTargets genes sorted by globalScore

d9 <- otensg %>%
  drop_na(globalScore) %>%
  sample_frac(1L) %>% # randomize row order before arranging
  arrange(desc(globalScore))

d9
query <- d9 %>% distinct(ensembl_gene_id) %>% pull(ensembl_gene_id)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = TRUE,
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

ORA of GVC genes in table from manuscript

d10 <- read_xlsx("2024-08-29_GVC Table 1C - WORKING COPY.xlsx", sheet = "PG Gene List", skip = 1)
  
d10 %>% arrange(`PG RANK`)
query <-
  d10 %>%
  rename(gene = `GVC expanded list of possible genes (500kb)`, rank = `PG RANK`) %>%
  bind_rows(tibble(gene = "APOE", rank = 0)) %>%
  arrange(rank) %>%
  distinct(gene) %>%
  pull(gene)

gostres <- gost(query = query,
                organism = "hsapiens",
                domain_scope = "annotated",
                exclude_iea = TRUE,
                ordered_query = FALSE, # <- UNORDERED QUERY!
                significant = TRUE,
                user_threshold = 0.005,
                correction_method = "gSCS")

gostres$result %>% select(term_name, term_id, source, everything())
gostplot(gostres, capped = FALSE, interactive = TRUE)

Check missing OpenTargets scores in table from manuscript

t <- read_xlsx("8-23-2024 - GVC Table 1C - WORKING COPYL_MRC.xlsx", skip = 1, na = "No data") %>% janitor::clean_names() %>% select(gvc_expanded_list_of_possible_genes_500kb, open_target_scores_global, open_target_scores_genetics) %>% rename(symbol = gvc_expanded_list_of_possible_genes_500kb)
t %>%
  left_join(ot, by = "symbol") %>%
  filter(round(open_target_scores_global, 4) != round(globalScore, 4)) %>% 
  select(symbol, open_target_scores_global, globalScore)
t %>%
  left_join(ot, by = "symbol") %>%
  filter(round(open_target_scores_genetics, 4) != round(otGeneticsPortal, 4)) %>% 
  select(symbol, open_target_scores_genetics, otGeneticsPortal)

GVC loci annotated with genes in overlaps

GVC ∩ Agora ∩ OpenTargets

gene_ids <- p %>% 
  filter(..set.. == "GVC∩Agora∩OpenTargets") %>%
  unnest(..values..) %>%
  select(gene_id = ..values..) %>%
  left_join(ago, by = join_by(gene_id == ensembl_gene_id)) %>% 
  left_join(otensg, by = join_by(gene_id == ensembl_gene_id)) %>%
  distinct(gene_id) %>%
  pull(gene_id)

length(gene_ids)
[1] 75
gvc %>%
  filter(gene_id %in% gene_ids) %>%
  select(gvc_locus = grouped_loci_gvc, gene_id, gene_symbol) %>%
  arrange(gene_symbol) %>%
  mutate(gene = gene_symbol) %>%
  # unite(gene, gene_id, gene_symbol, sep = ":", remove = FALSE) %>%
  distinct(gvc_locus, gene, .keep_all = TRUE) %>%
  group_by(gvc_locus) %>%
  summarize(genes = str_c(gene, collapse = " | ")) %>%
  select(gvc_locus, genes) %>%
  gt::gt()
gvc_locus genes
ABCA7 ABCA7 | NDUFS7
ABI3 / ACE NGFR | ZNF652
ACE ACE
ADAM10 / MINDY2 ADAM10 | ALDH1A2 | LIPC
ADAMTS4 ADAMTS4 | FCER1G | NDUFS2
ANK3 / CCDC6 CCDC6 | SLC16A9
ANKRD31 ANKRD31 | ENC1
APH1B LACTB
APOE / TOMM40 APOC1 | APOE | BCAM | MARK4 | NECTIN2
APP MRPL39
APP / ADAMTS1 ADAMTS1
BCKDK / KAT8 / VKORC1 BCKDK | STX4 | VKORC1
BIN1 BIN1
CASS4 CASS4
CD2AP CD2AP
CD33 CD33
CHRNE ENO3 | RABEP1 | SLC25A11 | ZFP3
CLU / PTK2B CLU | EPHX2 | PTK2B | SCARA3
CR1 CR1
CTSH CTSH
DOC2A DOC2A
ECHDC3 / USP6NL USP6NL
EED / PICALM DLG2 | PICALM
EPHA1 / EPHA1-AS1 EPHA1
HAVCR2 CYFIP2 | HAVCR2
HLA HLA-DRA | HLA-DRB1
ICA1 NXPH1
IDUA CPLX1
IL34 MTSS2
INPP5D INPP5D
LILRB2 / TMC4 LAIR1
MADD / SPI1 C1QTNF4 | NDUFS3 | NR1H3 | RAPSN | SPI1
MS4A / MS4A2 / MS4A4A / MS4A6A MRPL16 | MS4A2 | MS4A4A | MS4A6A
NDUFAF7 / PRKD3 QPCT
NYAP1 / PILRA / SPDYE3 / ZCWPW1 NYAP1
OARD1 / TREM2 / TREML2 / UNC5CL TREM2
PLCG2 PLCG2 | SDR42E1
PLEKHA1 HTRA1
RABEP1 / SCIMP ENO3 | RABEP1 | SLC25A11 | ZFP3
RASGEF1C MAPK9
RIN3 / SLC24A4 RIN3 | SLC24A4
SHARPIN PLEC
SIGLEC11 NR1H2
WNT3 NSF

Methods for manuscript

Phase 2. Pathway analysis of GVC, Agora and OpenTargets candidate AD genes. We conducted gene set over-representation analysis (ORA) of GVC, Agora and OpenTargets candidate AD gene lists using R[37] with the gprofiler2 package (see https://github.com/marcoralab/gvc_agora_opentargets), excluding electronic Gene Ontology (GO) annotations, and filtering results using a p-value significance threshold of 0.005 after multiple testing correction with the g:SCS algorithm.

We used the GVC gene list of 1,344 genes in the proximity of AD risk loci that we built as described above. We retrieved Agora’s gene list of Alzheimer’s disease nominated targets (site version 3.4.0; data version syn13363290-v68) from https://agora.adknowledgeportal.org/genes/nominated-targets and the Agora’s gene scores (data version syn25741025-v12) from https://www.synapse.org/Synapse:syn25741025 on October 24th 2024. We retrieved OpenTargets’ gene list of Alzheimer’s disease (EFO:MONDO_0004975) associated targets (data version v24_09) from https://platform.opentargets.org/disease/MONDO_0004975/associations on October 24th 2024.

We conducted ORA using the following candidate AD gene lists: 1) Agora’s gene list of Alzheimer’s disease nominated targets decreasingly ordered by Agora’s genetics, multi-omics, or target risk score; 2) OpenTargets’ gene list of Alzheimer’s disease associated targets decreasingly ordered by OpenTargets’ genetics portal or global score; 3) GVC gene list decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores; 4) lists corresponding to regions of the Venn diagram built using R[37] with the VennDiagram package (see https://github.com/marcoralab/gvc_agora_opentargets) and three sets corresponding to gene lists 1-3, decreasingly ordered by the aforementioned Agora’s or OpenTargets’ scores.